This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(openxlsx) # needed for read.xlsx
library(readr) # for reading one csv file (walktimes)
vaxdf <- read.xlsx(xlsxFile = "./vaxdatabyfsa.xlsx",
sheet = 1,
colNames = TRUE,
detectDates = TRUE)
censusdf <- read.csv("./98-401-X2016046_English_CSV_data.csv", header= TRUE)
library(tidyr)
package 㤼㸱tidyr㤼㸲 was built under R version 4.0.5
library(dplyr)
package 㤼㸱dplyr㤼㸲 was built under R version 4.0.5
Attaching package: 㤼㸱dplyr㤼㸲
The following objects are masked from 㤼㸱package:stats㤼㸲:
filter, lag
The following objects are masked from 㤼㸱package:base㤼㸲:
intersect, setdiff, setequal, union
colnames(censusdf)[9:14] <- c("variable", "varnum", "varnote", "ntot", "nmale", "nfemale")
colnames(censusdf)
[1] "ï..CENSUS_YEAR" "GEO_CODE..POR." "GEO_LEVEL"
[4] "GEO_NAME" "GNR" "GNR_LF"
[7] "DATA_QUALITY_FLAG" "ALT_GEO_CODE" "variable"
[10] "varnum" "varnote" "ntot"
[13] "nmale" "nfemale"
library(tidyverse)
package 㤼㸱tidyverse㤼㸲 was built under R version 4.0.5Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages ----------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.5 v stringr 1.4.0
v tibble 3.1.3 v forcats 0.5.1
v purrr 0.3.4
package 㤼㸱ggplot2㤼㸲 was built under R version 4.0.5package 㤼㸱tibble㤼㸲 was built under R version 4.0.5package 㤼㸱forcats㤼㸲 was built under R version 4.0.5-- Conflicts -------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
censusdf_ont <- censusdf %>% filter(substr(censusdf$GEO_NAME, 1, 1) == "K" | substr(censusdf$GEO_NAME, 1, 1) == "L" | substr(censusdf$GEO_NAME, 1, 1) == "M" | substr(censusdf$GEO_NAME, 1, 1) == "N" | substr(censusdf$GEO_NAME, 1, 1) == "P")
rm(censusdf)
colnames(censusdf_ont)
[1] "ï..CENSUS_YEAR" "GEO_CODE..POR." "GEO_LEVEL"
[4] "GEO_NAME" "GNR" "GNR_LF"
[7] "DATA_QUALITY_FLAG" "ALT_GEO_CODE" "variable"
[10] "varnum" "varnote" "ntot"
[13] "nmale" "nfemale"
censusdf_ont <- censusdf_ont[,c(4,9,10,12)]
# censusdf_ontc <- censusdf_ont[censusdf_ont$varnum %in% !c(118:660, 872:1134)]
censusdf_ontc <- censusdf_ont[with(censusdf_ont, !((varnum %in% 118:660) | (varnum %in% 872:1134))), ]
rm(censusdf_ont)
censusdf_ontc <- censusdf_ontc[with(censusdf_ontc, !((varnum %in% 1715:1774) | (varnum %in% 1778:1836) | (varnum %in% 1884:1919) | (varnum %in% 1950:2229))), ]
censusdf_ontc <- censusdf_ontc[with(censusdf_ontc, !((varnum %in% 847:866) | (varnum %in% 1291:1322) | (varnum %in% 1338:1616))), ]
library(maditr)
package 㤼㸱maditr㤼㸲 was built under R version 4.0.5Registered S3 method overwritten by 'data.table':
method from
print.data.table
To modify variables or add new variables:
let(mtcars, new_var = 42, new_var2 = new_var*hp) %>% head()
Attaching package: 㤼㸱maditr㤼㸲
The following object is masked from 㤼㸱package:purrr㤼㸲:
transpose
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
between, coalesce, first, last
The following object is masked from 㤼㸱package:readr㤼㸲:
cols
censusdf_ontc$ntot <- as.numeric(censusdf_ontc$ntot)
NAs introduced by coercion
# censusdf_wide <- dcast(censusdf_ontc, GEO_NAME ~ variable)
library(forcats)
censusdf_wide <- censusdf_ontc %>% dcast(GEO_NAME ~ fct_inorder(variable),
fun.aggregate = sum)
Using 'ntot' as value column. Use 'value.var' to override
colnames(censusdf_wide)[1] <- "FSA"
newdf <- inner_join(vaxdf, censusdf_wide, by = "FSA")
colnames(newdf)[3] <- "casesper100"
colnames(newdf)[4] <- "hospitper1000"
colnames(newdf)[5] <- "deathsper1000"
colnames(newdf)[6] <- "pctvax1dose"
colnames(newdf)[7] <- "pctvax2doses"
newdf$casesper100 <- as.numeric(newdf$casesper100)
NAs introduced by coercion
newdf$hospitper1000 <- as.numeric(newdf$hospitper1000)
NAs introduced by coercion
newdf$deathsper1000 <- as.numeric(newdf$deathsper1000)
NAs introduced by coercion
newdf$pctvax1dose <- as.numeric(newdf$pctvax1dose)
newdf$pctvax2doses <- as.numeric(newdf$pctvax2doses)
summary(lm(pctvax1dose ~ hospitper1000, data=newdf))
Call:
lm(formula = pctvax1dose ~ hospitper1000, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.234593 -0.030165 0.005122 0.032450 0.154055
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.695001 0.003863 179.932 <2e-16 ***
hospitper1000 -0.003948 0.001668 -2.367 0.0184 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.04782 on 452 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.01224, Adjusted R-squared: 0.01006
F-statistic: 5.603 on 1 and 452 DF, p-value: 0.01835
summary(lm(pctvax2doses ~ hospitper1000, data=newdf))
Call:
lm(formula = pctvax2doses ~ hospitper1000, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.226494 -0.042671 0.003918 0.044763 0.143913
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.585060 0.004919 118.94 < 2e-16 ***
hospitper1000 -0.005608 0.002124 -2.64 0.00857 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.0609 on 452 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.01519, Adjusted R-squared: 0.01301
F-statistic: 6.972 on 1 and 452 DF, p-value: 0.008567
vaccination rate is independent of cases, and death rate. It is negatively correlated to hospitalization rate, though R2 ~ 0.01 so this is uninformative.
colnames(newdf)[43] <- "medianage"
colnames(newdf)[44:46]
[1] "Total - Occupied private dwellings by structural type of dwelling - 100% data"
[2] "Single-detached house"
[3] "Apartment in a building that has five or more storeys"
newdf$pcthighrise <- newdf$`Apartment in a building that has five or more storeys`/newdf$`Total - Occupied private dwellings by structural type of dwelling - 100% data`
summary(lm(pctvax2doses ~ pcthighrise + medianage + hospitper1000, data=newdf))
Call:
lm(formula = pctvax2doses ~ pcthighrise + medianage + hospitper1000,
data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.196250 -0.038421 0.003832 0.041256 0.140845
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.4733907 0.0274751 17.230 < 2e-16 ***
pcthighrise 0.1087474 0.0144369 7.533 2.75e-13 ***
medianage 0.0024459 0.0006143 3.982 7.97e-05 ***
hospitper1000 -0.0101222 0.0022323 -4.534 7.42e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.05718 on 450 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.1357, Adjusted R-squared: 0.1299
F-statistic: 23.54 on 3 and 450 DF, p-value: 3.591e-14
colnames(newdf)[61] <- "avghhsize"
summary(lm(pctvax2doses ~ pcthighrise + medianage + hospitper1000 + avghhsize, data=newdf))
Call:
lm(formula = pctvax2doses ~ pcthighrise + medianage + hospitper1000 +
avghhsize, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.184784 -0.037082 -0.000318 0.037250 0.142395
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.2241197 0.0418803 5.351 1.39e-07 ***
pcthighrise 0.1988134 0.0180727 11.001 < 2e-16 ***
medianage 0.0046557 0.0006484 7.180 2.91e-12 ***
hospitper1000 -0.0179765 0.0023461 -7.662 1.13e-13 ***
avghhsize 0.0612219 0.0080830 7.574 2.08e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.0539 on 449 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.2336, Adjusted R-squared: 0.2268
F-statistic: 34.21 on 4 and 449 DF, p-value: < 2.2e-16
colnames(newdf)[88:94]
[1] "Total - Private households by household type - 100% data"
[2] "One-census-family households"
[3] "Without children in a census family"
[4] "With children in a census family"
[5] "Multiple-census-family households"
[6] "Non-census-family households"
[7] "One-person households"
newdf$pcthhoneperson <- newdf$`One-person households`/newdf$`Total - Private households by household type - 100% data`
summary(lm(pctvax2doses ~ pcthhoneperson + pcthighrise + medianage + hospitper1000 + avghhsize, data=newdf))
Call:
lm(formula = pctvax2doses ~ pcthhoneperson + pcthighrise + medianage +
hospitper1000 + avghhsize, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.185769 -0.037004 -0.000455 0.037510 0.143446
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1561322 0.0940877 1.659 0.097728 .
pcthhoneperson 0.0697454 0.0864228 0.807 0.420081
pcthighrise 0.1948139 0.0187467 10.392 < 2e-16 ***
medianage 0.0049038 0.0007178 6.832 2.75e-11 ***
hospitper1000 -0.0186462 0.0024893 -7.490 3.69e-13 ***
avghhsize 0.0774281 0.0216483 3.577 0.000386 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.05392 on 448 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.2347, Adjusted R-squared: 0.2262
F-statistic: 27.48 on 5 and 448 DF, p-value: < 2.2e-16
colnames(newdf)[139] <- "pcrecgovtransfers"
summary(lm(pctvax2doses ~ pcrecgovtransfers + pcthighrise + medianage + hospitper1000 + avghhsize, data=newdf))
Call:
lm(formula = pctvax2doses ~ pcrecgovtransfers + pcthighrise +
medianage + hospitper1000 + avghhsize, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.15858 -0.02946 0.00068 0.02798 0.14631
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3656186 0.0347199 10.531 < 2e-16 ***
pcrecgovtransfers -0.0079784 0.0005031 -15.858 < 2e-16 ***
pcthighrise 0.0983645 0.0158044 6.224 1.12e-09 ***
medianage 0.0059794 0.0005262 11.364 < 2e-16 ***
hospitper1000 0.0014268 0.0022428 0.636 0.5250
avghhsize 0.0141342 0.0071242 1.984 0.0479 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.04318 on 448 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.5091, Adjusted R-squared: 0.5037
F-statistic: 92.94 on 5 and 448 DF, p-value: < 2.2e-16
summary(lm(pctvax2doses ~ pcrecgovtransfers + medianage, data=newdf))
Call:
lm(formula = pctvax2doses ~ pcrecgovtransfers + medianage, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.168615 -0.028934 0.001299 0.030869 0.172473
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.4933931 0.0177872 27.739 <2e-16 ***
pcrecgovtransfers -0.0080716 0.0004369 -18.476 <2e-16 ***
medianage 0.0042474 0.0004385 9.686 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.04748 on 509 degrees of freedom
Multiple R-squared: 0.413, Adjusted R-squared: 0.4107
F-statistic: 179.1 on 2 and 509 DF, p-value: < 2.2e-16
library(car)
Loading required package: carData
Attaching package: 㤼㸱car㤼㸲
The following object is masked from 㤼㸱package:purrr㤼㸲:
some
The following object is masked from 㤼㸱package:dplyr㤼㸲:
recode
scatterplot(pctvax2doses ~ pcrecgovtransfers, data=newdf)
pctvax2doses ~ pcrecgovtransfers + medianage, data=newdf
scatterplot(pctvax2doses ~ medianage, data=newdf)
colnames(newdf)[114] <- "medpersonalATincome"
newdf$logmedpersonalATincome <- log(newdf$medpersonalATincome)
summary(lm(pctvax2doses ~ logmedpersonalATincome + pcrecgovtransfers + pcthighrise + medianage + hospitper1000 + avghhsize, data=newdf))
Call:
lm(formula = pctvax2doses ~ logmedpersonalATincome + pcrecgovtransfers +
pcthighrise + medianage + hospitper1000 + avghhsize, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.156366 -0.028909 0.001334 0.029781 0.131117
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.2661947 0.2375899 5.329 1.57e-07 ***
logmedpersonalATincome -0.0830254 0.0216756 -3.830 0.000146 ***
pcrecgovtransfers -0.0106778 0.0008615 -12.394 < 2e-16 ***
pcthighrise 0.0765777 0.0165751 4.620 5.03e-06 ***
medianage 0.0064007 0.0005298 12.080 < 2e-16 ***
hospitper1000 0.0005218 0.0022219 0.235 0.814449
avghhsize 0.0062159 0.0073161 0.850 0.395987
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.04254 on 447 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.5247, Adjusted R-squared: 0.5184
F-statistic: 82.26 on 6 and 447 DF, p-value: < 2.2e-16
colnames(newdf)[120] <- "medemployincome"
newdf$logmedemployincome <- log(newdf$medemployincome)
summary(lm(pctvax2doses ~ logmedpersonalATincome + pcrecgovtransfers + pcthighrise + medianage + hospitper1000 + avghhsize, data=newdf))
Call:
lm(formula = pctvax2doses ~ logmedpersonalATincome + pcrecgovtransfers +
pcthighrise + medianage + hospitper1000 + avghhsize, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.156366 -0.028909 0.001334 0.029781 0.131117
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.2661947 0.2375899 5.329 1.57e-07 ***
logmedpersonalATincome -0.0830254 0.0216756 -3.830 0.000146 ***
pcrecgovtransfers -0.0106778 0.0008615 -12.394 < 2e-16 ***
pcthighrise 0.0765777 0.0165751 4.620 5.03e-06 ***
medianage 0.0064007 0.0005298 12.080 < 2e-16 ***
hospitper1000 0.0005218 0.0022219 0.235 0.814449
avghhsize 0.0062159 0.0073161 0.850 0.395987
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.04254 on 447 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.5247, Adjusted R-squared: 0.5184
F-statistic: 82.26 on 6 and 447 DF, p-value: < 2.2e-16
cor(newdf$logmedemployincome, newdf$logmedpersonalATincome)
[1] 0.8833125
colnames(newdf)[249] <- "pc0to17underLICO"
summary(lm(pctvax2doses ~ logmedpersonalATincome + pc0to17underLICO + pcrecgovtransfers + pcthighrise + medianage + hospitper1000 + avghhsize, data=newdf))
Call:
lm(formula = pctvax2doses ~ logmedpersonalATincome + pc0to17underLICO +
pcrecgovtransfers + pcthighrise + medianage + hospitper1000 +
avghhsize, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.165004 -0.028984 0.001421 0.029281 0.128106
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.148e+00 2.578e-01 4.452 1.08e-05 ***
logmedpersonalATincome -7.286e-02 2.332e-02 -3.125 0.00190 **
pc0to17underLICO 6.114e-04 5.187e-04 1.179 0.23912
pcrecgovtransfers -1.075e-02 8.635e-04 -12.452 < 2e-16 ***
pcthighrise 6.366e-02 1.986e-02 3.205 0.00145 **
medianage 6.586e-03 5.526e-04 11.920 < 2e-16 ***
hospitper1000 -5.532e-05 2.274e-03 -0.024 0.98060
avghhsize 7.074e-03 7.349e-03 0.963 0.33627
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.04252 on 446 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.5262, Adjusted R-squared: 0.5188
F-statistic: 70.76 on 7 and 446 DF, p-value: < 2.2e-16
cor(newdf$pc0to17underLICO, newdf$pcrecgovtransfers)
[1] 0.2926835
cor(newdf$pc0to17underLICO, newdf$logmedpersonalATincome)
[1] -0.6058942
newdf$pcnotcitizens <- newdf[,257]/newdf[,253]
summary(lm(pctvax2doses ~ logmedpersonalATincome + pcnotcitizens + pcrecgovtransfers + pcthighrise + medianage + hospitper1000 + avghhsize, data=newdf))
Call:
lm(formula = pctvax2doses ~ logmedpersonalATincome + pcnotcitizens +
pcrecgovtransfers + pcthighrise + medianage + hospitper1000 +
avghhsize, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.164146 -0.027546 0.000029 0.030836 0.118962
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5259424 0.2826341 1.861 0.0634 .
logmedpersonalATincome -0.0124518 0.0261665 -0.476 0.6344
pcnotcitizens 0.3912118 0.0850047 4.602 5.46e-06 ***
pcrecgovtransfers -0.0087436 0.0009417 -9.285 < 2e-16 ***
pcthighrise 0.0058853 0.0223341 0.264 0.7923
medianage 0.0065656 0.0005195 12.638 < 2e-16 ***
hospitper1000 -0.0009550 0.0021969 -0.435 0.6640
avghhsize -0.0073755 0.0077417 -0.953 0.3413
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.04161 on 446 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.5463, Adjusted R-squared: 0.5392
F-statistic: 76.71 on 7 and 446 DF, p-value: < 2.2e-16
cor(newdf$pcnotcitizens, newdf$logmedpersonalATincome)
[1] -0.3932005
pairs(~pcnotcitizens+logmedpersonalATincome+pcthighrise+medianage,data=newdf,
main="Simple Scatterplot Matrix")
keep checking out new variables.
newdf$pctfirstgen <- newdf[,353]/newdf[,352]
newdf$pctthirdgen <- newdf[,355]/newdf[,352]
summary(lm(pctvax2doses ~ pctfirstgen + pctthirdgen + pcrecgovtransfers + pcthighrise + medianage + hospitper1000 + avghhsize, data=newdf))
Call:
lm(formula = pctvax2doses ~ pctfirstgen + pctthirdgen + pcrecgovtransfers +
pcthighrise + medianage + hospitper1000 + avghhsize, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.16866 -0.02634 -0.00123 0.02809 0.11779
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3439584 0.0532550 6.459 2.77e-10 ***
pctfirstgen 0.3197549 0.0657643 4.862 1.61e-06 ***
pctthirdgen 0.1440540 0.0519282 2.774 0.00577 **
pcrecgovtransfers -0.0086127 0.0005758 -14.957 < 2e-16 ***
pcthighrise -0.0081255 0.0216251 -0.376 0.70729
medianage 0.0057187 0.0005026 11.378 < 2e-16 ***
hospitper1000 -0.0009609 0.0025972 -0.370 0.71158
avghhsize -0.0248831 0.0090714 -2.743 0.00633 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.04113 on 446 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.5567, Adjusted R-squared: 0.5497
F-statistic: 80 on 7 and 446 DF, p-value: < 2.2e-16
newdf$pctaborid <- newdf[,364]/newdf[,363]
newdf$pctvismin <- newdf[,366]/newdf[,365]
newdf$pctblack <- newdf[,369]/newdf[,365]
newdf$pctsouthasian <- newdf[,367]/newdf[,365]
newdf$pctchinese <- newdf[,368]/newdf[,365]
newdf$pctlatam <- newdf[,371]/newdf[,365]
newdf$pctarab <- newdf[,372]/newdf[,365]
colnames(newdf[,530:538])
[1] "pctfirstgen" "pctthirdgen" "pctaborid" "pctvismin" "pctblack"
[6] "pctsouthasian" "pctchinese" "pctlatam" "pctarab"
summary(lm(pctvax2doses ~ pctfirstgen + pctthirdgen + pctaborid + pctvismin + pctblack + pctsouthasian + pctchinese + pctlatam + pctarab + pcrecgovtransfers + pcthighrise + medianage + hospitper1000 + avghhsize, data=newdf))
Call:
lm(formula = pctvax2doses ~ pctfirstgen + pctthirdgen + pctaborid +
pctvismin + pctblack + pctsouthasian + pctchinese + pctlatam +
pctarab + pcrecgovtransfers + pcthighrise + medianage + hospitper1000 +
avghhsize, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.168270 -0.023453 0.000516 0.025106 0.126345
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5207206 0.0563922 9.234 < 2e-16 ***
pctfirstgen -0.1515363 0.0973836 -1.556 0.12041
pctthirdgen -0.0270206 0.0563980 -0.479 0.63210
pctaborid 0.0834871 0.0389853 2.142 0.03278 *
pctvismin 0.2186631 0.0778791 2.808 0.00521 **
pctblack -0.1914336 0.1052227 -1.819 0.06954 .
pctsouthasian 0.0327629 0.0708709 0.462 0.64410
pctchinese 0.0663543 0.0696429 0.953 0.34123
pctlatam 0.1352767 0.1953989 0.692 0.48911
pctarab -0.1330775 0.1076203 -1.237 0.21692
pcrecgovtransfers -0.0091363 0.0005847 -15.626 < 2e-16 ***
pcthighrise -0.0044161 0.0215274 -0.205 0.83756
medianage 0.0066580 0.0005564 11.966 < 2e-16 ***
hospitper1000 0.0039991 0.0029953 1.335 0.18254
avghhsize -0.0470484 0.0098318 -4.785 2.34e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.03842 on 439 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.6193, Adjusted R-squared: 0.6071
F-statistic: 51 on 14 and 439 DF, p-value: < 2.2e-16
# Make a rural/urban categorical
#
newdf$urban <- "TRUE"
newdf[grep("K0", newdf$FSA), "urban" ] <- "FALSE"
newdf[grep("L0", newdf$FSA), "urban" ] <- "FALSE"
newdf[grep("N0", newdf$FSA), "urban" ] <- "FALSE"
newdf[grep("P0", newdf$FSA), "urban" ] <- "FALSE"
summary(lm(pctvax2doses ~ urban + pctfirstgen + pctthirdgen + pctaborid + pctvismin + pctblack + pctsouthasian + pctchinese + pctlatam + pctarab + pcrecgovtransfers + pcthighrise + medianage + hospitper1000 + avghhsize, data=newdf))
Call:
lm(formula = pctvax2doses ~ urban + pctfirstgen + pctthirdgen +
pctaborid + pctvismin + pctblack + pctsouthasian + pctchinese +
pctlatam + pctarab + pcrecgovtransfers + pcthighrise + medianage +
hospitper1000 + avghhsize, data = newdf)
Residuals:
Min 1Q Median 3Q Max
-0.173081 -0.022744 -0.000254 0.024278 0.111609
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.4279144 0.0589060 7.264 1.73e-12 ***
urbanTRUE 0.0306238 0.0067937 4.508 8.42e-06 ***
pctfirstgen -0.1213671 0.0955437 -1.270 0.20466
pctthirdgen 0.0115863 0.0558570 0.207 0.83577
pctaborid 0.1077672 0.0385331 2.797 0.00539 **
pctvismin 0.2045475 0.0762844 2.681 0.00761 **
pctblack -0.1800417 0.1030121 -1.748 0.08120 .
pctsouthasian 0.0411539 0.0693861 0.593 0.55341
pctchinese 0.0778169 0.0682067 1.141 0.25454
pctlatam 0.1732699 0.1914220 0.905 0.36587
pctarab -0.1392186 0.1053365 -1.322 0.18697
pcrecgovtransfers -0.0093040 0.0005734 -16.225 < 2e-16 ***
pcthighrise 0.0055626 0.0211848 0.263 0.79300
medianage 0.0070574 0.0005517 12.792 < 2e-16 ***
hospitper1000 0.0044206 0.0029330 1.507 0.13248
avghhsize -0.0387472 0.0097970 -3.955 8.92e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.0376 on 438 degrees of freedom
(58 observations deleted due to missingness)
Multiple R-squared: 0.6362, Adjusted R-squared: 0.6237
F-statistic: 51.05 on 15 and 438 DF, p-value: < 2.2e-16
bring in my FSA map.
library(sf) # for simple features
package 㤼㸱sf㤼㸲 was built under R version 4.0.5Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
FSA.sf <- st_read("./FSAGIS", layer = "lfsa000a16a_e")
Reading layer `lfsa000a16a_e' from data source `C:\Users\me\Documents\GitHub\vaxdata\FSAGIS' using driver `ESRI Shapefile'
Simple feature collection with 1620 features and 3 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 3658201 ymin: 658873 xmax: 9019157 ymax: 6083005
Projected CRS: PCS_Lambert_Conformal_Conic
FSA.sf <- FSA.sf %>% filter(substr(FSA.sf$CFSAUID, 1, 1) == "K" | substr(FSA.sf$CFSAUID, 1, 1) == "L" | substr(FSA.sf$CFSAUID, 1, 1) == "M" | substr(FSA.sf$CFSAUID, 1, 1) == "N" | substr(FSA.sf$CFSAUID, 1, 1) == "P")
library(tmap)
package 㤼㸱tmap㤼㸲 was built under R version 4.0.5Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
tm_shape(FSA.sf) +
tm_polygons("CFSAUID")
colnames(FSA.sf)[1] <- "FSA"
FSA.sf <- merge(FSA.sf, newdf, by = "FSA", all=TRUE)
FSA.sf <- merge(FSA.sf, censusdf_wide, by = "FSA", all=TRUE)
tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(FSA.sf) +
tm_polygons(col = "casesper100" ,
legend.hist = TRUE) +
tm_layout(legend.outside = TRUE)
The shape FSA.sf contains empty units.